Load required packages

if(!require("EBImage")){
  source("https://bioconductor.org/biocLite.R")
  biocLite("EBImage")
}
## Loading required package: EBImage
## Warning: package 'EBImage' was built under R version 3.4.3
if(!require("gbm")){
  install.packages("gbm")
}
## Loading required package: gbm
## Warning: package 'gbm' was built under R version 3.4.4
## Loaded gbm 2.1.4
library("EBImage")
library("gbm")

if(!require("xgboost")){
  install.packages("xgboost")
}
## Loading required package: xgboost
## Warning: package 'xgboost' was built under R version 3.4.4
if(!require("plotly")){
  install.packages("plotly")
}
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 3.4.4
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:xgboost':
## 
##     slice
## The following object is masked from 'package:EBImage':
## 
##     toRGB
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library("EBImage")
library("gbm")
library("xgboost")
library("plotly")

Step 0: specify directories.

Set the working directory to the image folder.In order to obain reproducible results, set.seed() whenever randomization is used.

set.seed(2018)
setwd("C:/43/paraalel/Fall2018-Proj3-Sec1-grp6")

Provide directories for training images. Low-resolution (LR) image set and High-resolution (HR) image set will be in different subfolders.

train_dir <- "../data/train_set/" # This will be modified for different data sets.

train_LR_dir <- paste(train_dir, "LR/", sep="")
train_HR_dir <- paste(train_dir, "HR/", sep="")
train_label_path <- paste(train_dir, "label.csv", sep="") 

Step 1: set up controls for evaluation experiments.

In this chunk, we have a set of controls for the evaluation experiments.

run.cv=TRUE # run cross-validation on the training set
K <- 3  # number of CV folds
run.feature.train=TRUE # process features for training set
run.test=TRUE # run evaluation on an independent test set
run.feature.test=TRUE # process features for test set

Using cross-validation or independent test set evaluation, we compare the performance of models with different specifications. In the baseline, we use GBM with different depth and trees.In the advanced model, we use XGBoost with different max_depth, eta, subsample, min_child_weight. In the following chunk,we list, depth and trees corresponding to GBM model that we will compare. Also, we list max_depth, eta, subsample, min_child_weight corresponding to Xgboost model that we will compare.

model_values <- expand.grid(depth = c(3,13), n.trees = c(150,300))

model_values_xgb <- expand.grid(max_depth = c(4,6,8), eta = c(0.3,0.5), subsample = c(0.5,0.8), min_child_weight = c(4,6,8))

Step 2: construct features and responses

feature.R wraps for feature engineering functions and options.

#setwd("~/Documents/GitHub/Fall2018-Proj3-Sec1-grp6")
source("../lib/feature.R")

tm_feature_train <- NA
if(run.feature.train){
  tm_feature_train <- system.time(dat_train <- feature(train_LR_dir, train_HR_dir))
  feat_train <- dat_train$feature
  label_train <- dat_train$label
}

Step 3: Train a classification model with training images

Call the train model and test model from library.

Model1: GBM

library("doParallel")
## Warning: package 'doParallel' was built under R version 3.4.4
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.4.4
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.4.4
## Loading required package: parallel
library("foreach")
source("../lib/train.gbmpar.R")
source("../lib/test.R")
source("../lib/train.xgboost.R")

Model selection with cross-validation (GBM)

  • Do model selection by choosing among different values of training model parameters, that is, the interaction depth and n.tress for GBM
source("../lib/cross_validation_gbmpar.R")

if(run.cv){
  err_cv.gbmpar <- array(dim=c(nrow(model_values), 2))
  for(k in 1:nrow(model_values)) {
    cat("k=", k, "\n")
    err_cv.gbmpar[k,] <- cv.function.gbmpar(feat_train, label_train, model_values[k, ], K)
  }
  save(err_cv.gbmpar, file="../output/err_cv.gbmpar.RData")
}
## k= 1 
## k= 2 
## k= 3 
## k= 4
err_cv.gbmpar
##             [,1]         [,2]
## [1,] 0.003778984 1.959322e-04
## [2,] 0.003714726 9.298758e-04
## [3,] 0.003630574 1.800356e-04
## [4,] 0.003515067 6.315562e-05

Visualize cross-validation results.

print(Sys.time())
## [1] "2018-11-07 01:18:15 EST"
if(run.cv){
  load("../output/err_cv.gbmpar.RData")
  mv <- factor(paste("(", model_values$depth,",", model_values$n.trees, ")"))
  plot(err_cv.gbmpar[,1]~mv, xlab="(Interaction Depth, Number of trees)", ylab="CV Error",
       main="Cross Validation Error", type="n", ylim=c(0, 0.005))
  points(err_cv.gbmpar[,1]~mv, col="blue", pch=16)
  lines(err_cv.gbmpar[,1]~mv, col="blue")
  #arrows(mv,err_cv[,1]-err_cv[,2], mv, err_cv[,1]+err_cv[,2], 
        #length=0.1, angle=90, code=3)
}

  • Choose the “best”" parameter value
model_best=model_values[1]
if(run.cv){
  model_best <- model_values[which.min(err_cv.gbmpar[,1]),]
}

par_best <- list(depth=model_best$depth, n.trees=model_best$n.trees)
par_best
## $depth
## [1] 13
## 
## $n.trees
## [1] 300
PSNR_gbm <- 20*log10(1) - 10 * log10(err_cv.gbmpar[which.min(err_cv.gbmpar[,1]),1])
PSNR_gbm
## [1] 24.54066
  • Train the model with the entire training set using the selected model (model parameter) via cross-validation.
tm_train_gbm=NA
tm_train_gbm <- system.time(fit_train.gbmpar <- train.gbmpar(feat_train, label_train, par_best))
save(fit_train.gbmpar, file="../output/fit_train.gbmpar.RData")

Model 2: XGBoost

Model selection with cross-validation (XGBoost)

  • Do model selection by choosing among different values of training model parameters, that is, the interaction max_depth, eta, subsample, min_child_weight in this example.
source("../lib/cross_validation_xgb.R")

if(run.cv){
  err_cv.xgb <- array(dim=c(nrow(model_values_xgb), 2))
  for(k in 1:nrow(model_values_xgb)){
    cat("k=", k, "\n")
    err_cv.xgb[k,] <- cv.function.xgb(X.train=feat_train, y.train=label_train, modelvalues=model_values_xgb[k, ], K=K)
  }
  save(err_cv.xgb, file="../output/err_cv.xgb.RData")
}
## k= 1 
## k= 2 
## k= 3 
## k= 4 
## k= 5 
## k= 6 
## k= 7 
## k= 8 
## k= 9 
## k= 10 
## k= 11 
## k= 12 
## k= 13 
## k= 14 
## k= 15 
## k= 16 
## k= 17 
## k= 18 
## k= 19 
## k= 20 
## k= 21 
## k= 22 
## k= 23 
## k= 24 
## k= 25 
## k= 26 
## k= 27 
## k= 28 
## k= 29 
## k= 30 
## k= 31 
## k= 32 
## k= 33 
## k= 34 
## k= 35 
## k= 36
err_cv.xgb
##              [,1]         [,2]
##  [1,] 0.003399785 2.812109e-04
##  [2,] 0.003469907 3.944959e-04
##  [3,] 0.003516291 3.885324e-04
##  [4,] 0.003627413 5.304099e-04
##  [5,] 0.003725011 4.045517e-04
##  [6,] 0.003785855 1.514141e-04
##  [7,] 0.003319649 2.433532e-04
##  [8,] 0.003437392 3.878908e-04
##  [9,] 0.003561283 3.808719e-04
## [10,] 0.003415979 2.872261e-04
## [11,] 0.003486052 3.253142e-04
## [12,] 0.003586115 2.960904e-04
## [13,] 0.003366679 2.750453e-04
## [14,] 0.003422762 1.666533e-04
## [15,] 0.003464277 3.965385e-04
## [16,] 0.003436438 3.243575e-04
## [17,] 0.003571086 2.011602e-04
## [18,] 0.003686769 2.675746e-04
## [19,] 0.003299866 2.640279e-04
## [20,] 0.003396954 1.075917e-04
## [21,] 0.003438341 2.121268e-04
## [22,] 0.003402661 2.170707e-04
## [23,] 0.003474652 1.810802e-04
## [24,] 0.003519362 3.724450e-04
## [25,] 0.003356108 1.053242e-04
## [26,] 0.003374840 3.315921e-04
## [27,] 0.003400675 1.931607e-04
## [28,] 0.003387110 2.945585e-04
## [29,] 0.003489634 3.269490e-04
## [30,] 0.003667965 3.602869e-04
## [31,] 0.003299594 2.641026e-04
## [32,] 0.003371867 3.724390e-04
## [33,] 0.003343184 2.429026e-04
## [34,] 0.003354856 2.465940e-04
## [35,] 0.003495532 8.593389e-05
## [36,] 0.003503381 2.418258e-04

Visualize cross-validation results.

plot_ly(x=model_values_xgb$max_depth, y=model_values_xgb$eta, z=err_cv.xgb, type="surface")
  • Choose the “best”" parameter value
model_best_xgb=model_values_xgb[1]
if(run.cv){
  model_best_xgb <- model_values_xgb[which.min(err_cv.xgb[,1]),]
}

par_best_xgb <- list(max_depth=model_best_xgb$max_depth, eta=model_best_xgb$eta, subsample = model_best_xgb$subsample, min_child_weight = model_best_xgb$min_child_weight)
#par_best_xgb <- list()

PSNR_xgb <- 20*log10(1) - 10 * log10(err_cv.xgb[which.min(err_cv.xgb[,1]),1])
PSNR_xgb
## [1] 24.81539
  • Train the model with the entire training set using the selected model (model parameter) via cross-validation.
tm_train_xgb=NA
tm_train_xgb <- system.time(fit_train.xgb <- train.xgboost(feat_train, label_train, par_best_xgb))
save(fit_train.xgb, file="../output/fit_train_xgb.RData")

Step 4: Super-resolution for test images

Feed the final training model with the completely holdout testing data. + superResolution.R + Input: a path that points to the folder of low-resolution test images. + Input: a path that points to the folder (empty) of high-resolution test images. + Input: an R object that contains tuned predictors. + Output: construct high-resolution versions for each low-resolution test image.

setwd("C:/43/paraalel/Fall2018-Proj3-Sec1-grp6")
source("./lib/Superresolution.Parallel.R")
source("./lib/test.R")
test_dir <- "./data/test_sample/" # This will be modified for different data sets.
test_LR_dir <- paste(test_dir, "LR/", sep="")
test_HR_dir_GBM <- paste(test_dir, "HR/GBM/", sep="")
test_HR_dir_XGB <- paste(test_dir, "HR/XGB/", sep="")


# Output HR versions using GBM
tm_test_gbm=NA
if(run.test){
  load(file="./output/fit_train.gbmpar.RData")
  tm_test_gbm <- system.time(superResolution.par(LR_dir=test_LR_dir, HR_dir=test_HR_dir_GBM, modelList=fit_train.gbmpar))
}

# Output HR versions using XGBoost
tm_xgb_test=NA
if(run.test){
  load(file="./output/fit_train_xgbpar.RData")
  tm_test_xgb <- system.time(superResolution.par(LR_dir=test_LR_dir, HR_dir=test_HR_dir_XGB, modelList=fit_train.xgb))
}

Summarize Running Time

Prediction performance matters, so does the running times for constructing features and for training the model, especially when the computation resource is limited.

cat("Time for constructing training features=", tm_feature_train[1], "s \n")
## Time for constructing training features= 111.21 s
#cat("Time for constructing testing features=", tm_feature_test[1], "s \n")
cat("Time for training GBM model=", tm_train_gbm[1], "s \n")
## Time for training GBM model= 0.11 s
cat("Time for GBM super-resolution=", tm_test_gbm[1], "s \n")
## Time for GBM super-resolution= 0.36 s
cat("Time for training XGBoost model=", tm_train_xgb[1], "s \n")
## Time for training XGBoost model= 0.31 s
cat("Time for XGBoost super-resolution=", tm_test_xgb[1], "s \n")
## Time for XGBoost super-resolution= 0.11 s